Set Up
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(ggthemes)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(forcats)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(readxl)
library(RColorBrewer)
library(colourpicker)
dashboard <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/CFDashboardData2.xlsx")
summary <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/midterm data.xlsx")
urbanicity <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/urbanicity.xlsx")
calirace <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/countyracedata.xlsx")
ihss <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/ihsspop.xlsx")
jandata <- read_excel("/Users/rachelhammond/Documents/Berkeley/2ndSemester/ds4pp/Labs/midterm/jandata.xlsx")
clientcolors <- c("#f98d4e", "#ffc500","#82b64d", "#107652", "#dd323f")
Data Cleaning
dashboard <- dashboard %>%
mutate(american_indian = as.double(american_indian)) %>%
mutate(Black = as.double(Black),
other_race = as.double(other_race),
race_unknown = as.double(race_unknown),
White = as.double(White),
more_than_one_race = as.double(more_than_one_race),
Hispanic = as.double(Hispanic),
Asian = as.double(Asian),
pacific_islander = as.double(pacific_islander))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
Take Up Plots
plotlyex <- summary %>%
mutate(county = fct_reorder(county, takeupdecimal)) %>%
ggplot(mapping = aes(x = county, y = takeupdecimal)) +
geom_point(color = "#107652") +
geom_hline(yintercept = .75, color = "#f98d4e") +
labs(y = "",
x= "",
title = "Most counties have take up rates between 50% and 70%",
caption = "Orange line indicates 75% take up goal set by CDSS.") +
coord_flip() +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
theme(axis.text.y = element_text(size = 5))
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(plotlyex)
summary %>%
filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>%
mutate(county = fct_reorder(county, takeupdecimal)) %>%
ggplot(mapping = aes(x = county, y = takeupdecimal, fill = takeupdecimal)) +
geom_col() +
geom_hline(yintercept = .75, color = "#f98d4e", size = 2) +
labs(x = "",
y = "",
title = "My IPA group is focusing on 12 counties",
caption = "Orange line indicates 75% take up goal set by CDSS.") +
coord_flip() +
scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
theme_classic() +
scale_fill_gradient(low = "gray93", high = "#107652") +
theme(legend.position = "none")

dashboard %>%
filter(county == "Statewide") %>%
filter(month != "May") %>%
ggplot(mapping = aes(x = fct_reorder(month, date), y = New_SSI_Approved/1000)) +
geom_bar(stat = "identity", fill = "#107652") +
labs(x ="",
y= "Applications Approved (000's)",
title= "California enrolled 45% of the eligible SSI population in the first three months") +
theme_classic()

Race and Language
bycounty <- dashboard %>%
group_by(county) %>%
filter(county != "Statewide") %>%
summarize(totapproved = sum(New_SSI_Approved),
english = sum(english),
spanish = sum(spanish),
other = sum(armenian, farsi, korean, russian, cantonese, vietnamese, cambodian, mandarin, other_language))
bycounty <- bycounty %>%
left_join(summary, by = "county")
bycounty[is.na(bycounty)] <- 0
bycounty <- bycounty %>%
mutate(English = english/totapproved) %>%
mutate(Spanish = spanish/totapproved) %>%
mutate(Other = other.x/totapproved) %>%
mutate(all_other_race = morethanone + pacific_islander + american_indian + other.y + unknown)
languages <- bycounty %>%
select(county, takeup, English, Spanish, Other) %>%
pivot_longer(-c(county, takeup), names_to = "language", values_to = "percent")
languages <- languages %>%
mutate(langvalue = case_when(language == "English" ~ 1,
language == "Spanish" ~ 2,
language == "Other" ~ 3))
languages %>%
filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>%
mutate(county = fct_reorder(county, takeup)) %>%
ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(language, -langvalue))) +
geom_bar(stat = "identity", position = "fill", alpha = .7) +
coord_flip() +
labs(x = "",
y = "",
title = "Most counties have a majority of take up by English speakers",
fill = "") +
scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
scale_fill_manual(values = clientcolors, guide = guide_legend(reverse = TRUE)) +
theme_classic() +
theme(legend.position = "bottom")

race <- bycounty %>%
select(county, takeup, White, Black, Asian, Hispanic, all_other_race) %>%
pivot_longer(-c(county,takeup), names_to = "race", values_to = "percent")
race<- race %>%
mutate(racevalue = case_when(race == "White" ~ 1,
race == "Hispanic" ~ 2,
race == "Asian" ~ 3,
race == "Black" ~ 4,
race == "all_other_race" ~ 5))
race %>%
filter(county %in% c("Los Angeles", "San Bernardino", "San Diego", "Ventura", "San Francisco", "Shasta", "Humboldt", "Sacramento", "Tulare", "Orange", "Monterey", "San Mateo")) %>%
mutate(county = fct_reorder(county, takeup)) %>%
ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(race, -racevalue))) +
geom_bar(stat = "identity", position = "fill", alpha = .7) +
coord_flip() +
labs(x = "",
y = "",
fill = "",
title = "Racial compostion of recipients varies by county") +
scale_fill_manual(values = clientcolors,
labels = c("Other", "Black", "Asian", "Hispanic", "White"),
guide = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
theme_classic() +
theme(legend.position = "bottom")

ihss <- ihss %>%
mutate(racevalue = case_when(race == "White" ~ 1,
race == "Hispanic" ~ 2,
race == "Asian" ~ 3,
race == "Black" ~ 4,
race == "Other" ~ 5))
ihss %>%
mutate(county = fct_reorder(county, takeup)) %>%
ggplot(mapping = aes(x = county, y = percent, fill = fct_reorder(race, -racevalue))) +
geom_bar(stat = "identity", position = "fill", alpha = .7) +
coord_flip() +
labs(x = "",
y = "",
fill = "",
title = "IHSS race breakdown") +
scale_fill_manual(values = clientcolors, guide = guide_legend(reverse = TRUE)) +
scale_y_continuous(labels = scales::percent_format(accuracy=1)) +
theme_classic() +
theme(legend.position = "bottom")

Map
theme_map <- function(base_size=9, base_family="") {
require(grid)
theme_bw(base_size=base_size, base_family=base_family) %+replace%
theme(axis.line=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid=element_blank(),
panel.spacing=unit(0, "lines"),
plot.background=element_blank(),
legend.justification = c(0,0),
legend.position = c(0,0)
)
}
counties <- map_data("county")
summary <- summary %>%
mutate(subregion = tolower(county))
county_eligible <- counties %>%
left_join(summary, by = "subregion")
calicounty <- county_eligible %>%
filter(region == "california") %>%
ggplot(aes(x = long, y = lat,
group = group)) +
geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = FALSE) +
theme_map()
## Loading required package: grid
calicounty2 <- county_eligible %>%
filter(region == "california") %>%
ggplot(aes(x = long, y = lat, group = group, fill = takeupdecimal)) +
geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 32.5, lat1 = 43) +
theme_map() +
labs(title = "Take up rates vary widely across counties", fill = "Take Up Rate") +
scale_fill_gradient(low = "gray93", high = "#107652",
labels = scales::percent_format(accuracy = 1))
calicounty2
